Setup

library(ngsReports)
library(magrittr)
library(scales)
library(pander)
library(tidyverse)
theme_set(theme_bw())
deMuxFqc <- list.files("../2_demux/FastQC/", pattern = "zip", full.names = TRUE) %>%
    getFastqcData()
alnFqc <- list.files("../4_aligned/FastQC/", pattern = "zip", full.names = TRUE) %>%
    getFastqcData()
oryGC <- read_rds("oryGC.RDS")

Compare Reads With Alignments

*Comparison of library sizes before and after alignment. Some degree of multiple alignment was observed in all libraries*

Comparison of library sizes before and after alignment. Some degree of multiple alignment was observed in all libraries

readTotals(alnFqc) %>%
    mutate(Sample = str_remove(Filename, ".sorted.bam"),
           Population = case_when(
               grepl("gc", Sample) ~ "1996",
               grepl("ora", Sample) ~ "2012",
               !grepl("(gc|ora)", Sample) ~ "2010"
           )) %>%
    dplyr::select(Sample, Population, Total_Sequences) %>%
    group_by(Population) %>%
    summarise(Samples = n(),
              `Smallest Library` = min(Total_Sequences),
              `Median Library` = median(Total_Sequences),
              `Largest Library` = max(Total_Sequences),
              `Total Alignments` = sum(Total_Sequences)) %>%
    pander(big.mark = ",",
           split.tables = Inf,
           justify = "rrrrrr",
           caption = "Summary of Library Sizes After Alignment")
Summary of Library Sizes After Alignment
Population Samples Smallest Library Median Library Largest Library Total Alignments
1996 59 2,097,881 6,869,250 20,114,691 458,139,215
2010 37 1,556,040 5,155,139 9,391,888 203,789,421
2012 53 4,395,062 8,564,072 18,132,894 472,659,756

Identify Low Quality Samples by GC content

lowQ <- paste(c("pt1125", "gc2709", "gc2700", "gc2776", "ora663"), "sorted", "bam", sep = ".")

Potential low quality samples were identified by GC content as pt1125.sorted.bam, gc2709.sorted.bam, gc2700.sorted.bam, gc2776.sorted.bam and ora663.sorted.bam

alnFqc %>%
    magrittr::extract(fileName(.) %in% lowQ) %>%
    plotGcContent(plotType = "line",usePlotly = TRUE, GCobject = oryGC, species = "Ocuniculus")

GC content of potential low quality samples

alnFqc %>%
    magrittr::extract(!fileName(.) %in% lowQ) %>%
    plotGcContent(plotType = "line",usePlotly = TRUE, GCobject = oryGC, species = "Ocuniculus")

GC content of remaining samples

Alignments from these samples should be moved and placed into a separate folder to ensure their exclusion from the stacks pipeline.

Session Info

sessionInfo() %>% pander()

R version 3.5.2 (2018-12-20)

**Platform:** x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_AU.UTF-8, LC_NUMERIC=C, LC_TIME=en_AU.UTF-8, LC_COLLATE=en_AU.UTF-8, LC_MONETARY=en_AU.UTF-8, LC_MESSAGES=en_AU.UTF-8, LC_PAPER=en_AU.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_AU.UTF-8 and LC_IDENTIFICATION=C

attached base packages: stats4, parallel, stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: bindrcpp(v.0.2.2), forcats(v.0.3.0), stringr(v.1.3.1), dplyr(v.0.7.8), purrr(v.0.2.5), readr(v.1.3.1), tidyr(v.0.8.2), tidyverse(v.1.2.1), pander(v.0.6.3), scales(v.1.0.0), magrittr(v.1.5), ngsReports(v.0.99.0), tibble(v.2.0.0), ggplot2(v.3.1.0), fastqcTheoreticalGC(v.0.0.1), BSgenome(v.1.50.0), rtracklayer(v.1.42.1), Biostrings(v.2.50.2), XVector(v.0.22.0), GenomicRanges(v.1.34.0), GenomeInfoDb(v.1.18.1), IRanges(v.2.16.0), S4Vectors(v.0.20.1) and BiocGenerics(v.0.28.0)

loaded via a namespace (and not attached): nlme(v.3.1-137), bitops(v.1.0-6), matrixStats(v.0.54.0), lubridate(v.1.7.4), RColorBrewer(v.1.1-2), httr(v.1.4.0), tools(v.3.5.2), backports(v.1.1.3), R6(v.2.3.0), lazyeval(v.0.2.1), colorspace(v.1.3-2), withr(v.2.1.2), tidyselect(v.0.2.5), compiler(v.3.5.2), cli(v.1.0.1), rvest(v.0.3.2), Biobase(v.2.42.0), Cairo(v.1.5-9), xml2(v.1.2.0), DelayedArray(v.0.8.0), plotly(v.4.8.0), ggdendro(v.0.1-20), labeling(v.0.3), digest(v.0.6.18), Rsamtools(v.1.34.0), rmarkdown(v.1.11), pkgconfig(v.2.0.2), htmltools(v.0.3.6), highr(v.0.7), htmlwidgets(v.1.3), rlang(v.0.3.0.1), readxl(v.1.2.0), rstudioapi(v.0.8), shiny(v.1.2.0), bindr(v.0.1.1), generics(v.0.0.2), zoo(v.1.8-4), hwriter(v.1.3.2), jsonlite(v.1.6), crosstalk(v.1.0.0), BiocParallel(v.1.16.5), RCurl(v.1.95-4.11), GenomeInfoDbData(v.1.2.0), Matrix(v.1.2-15), Rcpp(v.1.0.0), munsell(v.0.5.0), stringi(v.1.2.4), yaml(v.2.2.0), MASS(v.7.3-51.1), SummarizedExperiment(v.1.12.0), zlibbioc(v.1.28.0), plyr(v.1.8.4), grid(v.3.5.2), promises(v.1.0.1), crayon(v.1.3.4), lattice(v.0.20-38), haven(v.2.0.0), hms(v.0.4.2), knitr(v.1.21), pillar(v.1.3.1), XML(v.3.98-1.16), glue(v.1.3.0), evaluate(v.0.12), ShortRead(v.1.40.0), latticeExtra(v.0.6-28), data.table(v.1.11.8), modelr(v.0.1.2), httpuv(v.1.4.5.1), cellranger(v.1.1.0), gtable(v.0.2.0), assertthat(v.0.2.0), xfun(v.0.4), mime(v.0.6), xtable(v.1.8-3), broom(v.0.5.1), later(v.0.7.5), viridisLite(v.0.3.0) and GenomicAlignments(v.1.18.1)